# -*- coding: utf-8 -*-
import scrapy
import re
from book_url.items import BookUrlItem
from scrapy_redis.spiders import RedisSpider


class BooksUrlSpider(RedisSpider):
    name = 'books_url'
    redis_key = 'book:tags_url'
    base_url = 'https://book.douban.com'

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def parse(self, response):
        book_items = BookUrlItem()
        for item in response.css('.subject-item .pic a'):
            book_url = item.css('a::attr(href)').get()
            book_items['book_id'] = re.findall('https://book.douban.com/subject/(\d+)/', book_url)[0]
            book_items['book_url'] = book_url
            yield book_items
        next_url = self.base_url + response.css('span.next a::attr(href)').get(default='')
        print('下一页地址url：', next_url)
        if next_url:
            next_url = response.urljoin(next_url)
            yield scrapy.Request(url=next_url, callback=self.parse)
